## Compositional time series analysis of state budgets
## using seemingly unrelated regression of additive logratios
## with optional lags and state fixed effects
## Christopher Adolph, Christian Breunig, and Chris Koski
## 3 March 2018
##                                        
## Descriptive statistics for Figure 1 and Table A2
## Panel unit root tests
##

                                        
# Clear memory
rm(list = ls())

#######################################################################
# User settings
# (Create a new file for each composition + specification)
dsname <- "dataABK.csv"    # Must be comma-seperated variable file
specname <- "Model1descriptives"

lhs <- c("K12Ed",
         "Welfare",
         "Medical",         
         "NaturalResources",
         "HigherEd",
         "Highways",
         "PublicSafety",
         "Miscellaneous"
         )

lhsNice <- c("K-12\nEducation",
             "Medicaid\n& Welfare",
             "Public Health\n& Hospitals",
             "Natural\nResources",
             "Higher\nEducation",
             "Highways",
             "Police\n& Prisons",
             "Other\nSpending"
             )

lhsWide <- c("K-12",
             "Medicaid & Welfare",
             "Public Health & Hospitals",
             "Natural Resources",
             "Higher Ed",
             "Highways",
             "Police & Prisons",
             "Other Spending"
             )

## Covariates
rhs <- c("ACIR",
         "govadd",
         "DUG", "RUG",
         "rpincpc",
         "popdensity",
         "Unemployment",
         "South","Northeast","West",
         "trend",
         "pctunder19",
         "pctover65"
         )
                              # Covariates
                              # Note: interactions must be constructed in
                              # advance (i.e., exist as variables in the
                              # datafile)
numlags <- 1                  # Number of lags in model
csfe <- 0                     # Include state fixed effects? (0/1)
csvar <- "State"              # Name of state variable
csvarid <- "FIPS"             # Name of state id number variable
tsvar <- "Year"               # Name of time variable
sims <- 100000                 # Num of simulations for post-estimation
ci <- c(0.95, 0.90)           # Confidence intervals of first diffs
simperiods <- 4               # Periods predicted ahead in simulations
plotperiod <- simperiods      # Period plotted in ropeladders
xlagnum <- 1                  # Lag for covariates
# User must also supply counterfactuals below
#######################################################################


# Load libraries (make sure these are available to R)
library(tile)
library(simcf)
# Download tile and simcf from:
#  http://faculty.washington.edu/cadolph/software
library(compositions)
library(MASS)
library(systemfit)
source("helper.r")     # Keep this file in working directory

# Load data
data <- read.csv(dsname, na.strings = "NA", header=TRUE)

## Delete early years
data <- data[data$Year>=1983,]

## Delete NAs in 2010
data <- data[data$Year<2010,]

## Remove Alaska and Hawaii
data <- data[!(data$State=="AK")&!(data$State=="HI")&!(data$State=="NE"),]

attach(data)
c <- getvar(csvarid)
t <- getvar(tsvar)

# Set endogenous variables
components <- getvar(lhs)       # need to fix var names
components <- clo(components)   # close the composition
alrcomp <- alr(components)      # convert to centered logratios
alrcomp.names <- names(alrcomp)
for (i in 1:length(alrcomp.names))
  alrcomp.names[i] <- paste(alrcomp.names[i],"LR",sep="")
names(alrcomp) <- alrcomp.names
datarun <- alrcomp
ncomp <- length(lhs)
compavg <- apply(na.omit(components),2,"mean")

# Make lags of response variables
lagcomp <- lagcomp.names <- NULL
for (i in 1:ncol(alrcomp)) {
  for (j in 1:numlags) {
    curlag <- paste("lag",j,sep="")
    lagcomp <- cbind(lagcomp,lagpanel(alrcomp[,i],c,t,j))
    lagcomp.names <- c(lagcomp.names,paste(names(alrcomp)[i],curlag,sep=""))
  }
}
colnames(lagcomp) <- lagcomp.names
datarun <- cbind(datarun,as.data.frame(lagcomp))

# Add covariates and constant
xcovariates <- getvar(rhs)
lagcovariates <- as.data.frame(lagpanel(xcovariates,c,t,xlagnum))
names(lagcovariates) <- names(xcovariates)
xcovariates <- lagcovariates
datarun <- cbind(datarun,xcovariates)
constant <- as.data.frame(matrix(data=1,nrow=nrow(datarun),ncol=1))
names(constant) <- "constant"
datarun <- cbind(datarun,constant)
numcovariates <- length(rhs)

# Add fixed effects
if (csfe) {
  stateid <- getvar(csvar)
  cslist <- unique(stateid)
  ncs <- nrow(cslist)
  dummies <- matrix(nrow=nrow(datarun),ncol=nrow(cslist))
  for (i in 1:nrow(datarun))
    dummies[i,] <- as.numeric(as.vector(stateid[i,1])==as.vector(cslist))
  colnames(dummies) <- as.vector(t(cslist))
  datarun <- cbind(datarun,as.data.frame(dummies))
} else {
  cslist <- "constant"
}

# Construct system of equations
eq <- NULL
for (i in 1:ncomp-1) {
  eq[i] <- paste(names(datarun)[i],"~","- 1")
  for (j in 1:numlags) {
    curlagnames <- lagcomp.names[(i-1)*numlags+1:(i*numlags)]
    eq[i] <- paste(eq[i],"+",curlagnames[j])
  }
  for (j in 1:numcovariates) {
    eq[i] <- paste(eq[i],"+",rhs[j])
  }
  if (csfe) {
    for (k in 1:ncs) {
      eq[i] <- paste(eq[i],"+",cslist[k,1])
    }
  } else {
    eq[i] <- paste(eq[i],"+","constant")
  }
}
eqns <- list(comp1="y~x")
eqnnames <- NULL
ieq <- 1
while(ieq<ncomp) {
  ieqn <- paste("comp",ieq,sep="")
  eqnnames <- c(eqnnames,ieqn)
  cureq <- eq[ieq]
  eqns[[ieqn]] <- cureq
  eqns[[ieqn]] <- as.formula(eqns[[ieqn]])
  ieq <- ieq+1
}


# Listwise deletion of missings
datarun <- na.omit(datarun)

# Run SUR
sur.res1 <- systemfit("SUR",
                      formula=eqns,
                      data=datarun,
                      control = systemfit.control(maxiter=100)
                      )
print(summary(sur.res1))


### Panel unit root tests
library(plm)

selectdata <- as.data.frame(alrcomp)

selectdata <- cbind(State, Year, selectdata)
selectdata <- selectdata[selectdata$Year>1983,]

pval <- rep(NA, ncol(alrcomp))
for (i in 1:ncol(alrcomp)) {

    curseries <- data.frame(split(selectdata[,(2+i)], as.character(selectdata$State)))
    
    urtest <- purtest(curseries, pmax = 4, exo = "trend", test = "ips")
    print(urtest)
    pval[i] <- urtest$statistic[6]
}

pval <- unlist(pval)


### Descriptive table
components <- getvar(lhs)
components <- components/apply(components, 1, sum)
xcovariates <- getvar(c(rhs, "RealPctChBudgetPC"))

sumdata <- cbind(components, xcovariates)

summaries <- cbind(apply(sumdata, 2, min),
      apply(sumdata, 2, quantile, probs=0.25),
      apply(sumdata, 2, median),
      apply(sumdata, 2, mean),
      apply(sumdata, 2, sd),
      apply(sumdata, 2, quantile, probs=0.75),
      apply(sumdata, 2, max)
      )
write.csv(summaries, "sumdata.csv")


### Descriptive graphic

lhs <- c("Welfare",
         "K12Ed",
         "HigherEd",
         "Miscellaneous",
         "Highways",
         "Medical",         
         "PublicSafety",
         "NaturalResources"
         )

lhsWide <- c("Medicaid & Welfare",
             "K-12 Education",
             "Higher Education",
             "Other Spending",
             "Highways",
             "Public Health & Hospitals",
             "Police & Prisons",
             "Natural Resources"
             )

describedata <- data[,lhs]/apply(data[,lhs],1,sum)
describedata <- cbind(State, Year, describedata)
describedata <- describedata[describedata$Year>1983,]

compMeans <- apply(describedata[,3:ncol(describedata)],2,mean)
compSDs <- apply(describedata[,3:ncol(describedata)],2,sd)
compMeans1984 <- apply(describedata[describedata$Year==1984,3:ncol(describedata)],2,median)
compMeans2009 <- apply(describedata[describedata$Year==2009,3:ncol(describedata)],2,median)

## Make a matrix of each quantile for each component (columns) and year (rows)
uYear <- unique(describedata$Year)
nYear <- length(uYear)
nComp <- length(lhsWide)
meanComponents <- medianComponents <- q10Components <- q25Components <- q75Components <-
    q90Components <- matrix(NA, nrow=nYear, ncol=nComp)
for (i in 1:nComp) {
    curseries <- data.frame(split(describedata[,(2+i)], as.character(describedata$State)))
    for (j in 1:nYear) {
        meanComponents[j,i] <- mean(as.numeric(curseries[j,]))
        medianComponents[j,i] <- median(as.numeric(curseries[j,]))
        q10Components[j,i] <- quantile(as.numeric(curseries[j,]), probs=0.10)
        q25Components[j,i] <- quantile(as.numeric(curseries[j,]), probs=0.25)
        q75Components[j,i] <- quantile(as.numeric(curseries[j,]), probs=0.75)
        q90Components[j,i] <- quantile(as.numeric(curseries[j,]), probs=0.90)
    }
}

## Make traces
xoffsetlab <- c(8.75, 7.25,  7.75, 7.5,  5, 11.5,  7.5, 8.75)
xmarks <- uYear - min(uYear) + 1
offset <- 7
curplot <- 0
collectedtraces <- collectedtitles <- collectedmeans <- collectedstarts <- 
    collectedsds <- collectedends <- collectedCIs1 <- collectedCIs2 <- vector("list", nComp)
for (i in 1:nComp) {

    secondplot <- FALSE
    if (i%%2) { curplot <- curplot + 1 } else {secondplot <- TRUE}
    
    
    collectedtraces[[i]] <- lineplot(x=xmarks + secondplot*(offset + max(xmarks)),
                                     y=medianComponents[,i],
                                     lineend="square",
                                     plot=curplot)

    collectedCIs1[[i]] <- polygonTile(x=c(xmarks + secondplot*(offset + max(xmarks)),
                                          rev(xmarks + secondplot*(offset + max(xmarks)))),
                                     y=c(q10Components[,i], rev(q90Components[,i])),
                                     col=NA,
                                     fill="gray75",
                                     plot=curplot)

    collectedCIs2[[i]] <- polygonTile(x=c(xmarks + secondplot*(offset + max(xmarks)),
                                          rev(xmarks + secondplot*(offset + max(xmarks)))),
                                     y=c(q25Components[,i], rev(q75Components[,i])),
                                     col=NA,
                                     fill="gray45",
                                     plot=curplot)
                                     
    collectedtitles[[i]] <- textTile(labels=lhsWide[i],
                                     x=xoffsetlab[i] + secondplot*(offset + max(xmarks)),
                                     y=(i<=4)*exp(log(0.02) + 0.5*(log(0.05) - log(0.02))) +
                                         (i>4)*exp(log(.2) + 0.5*(log(.5) - log(.2))),
                                     cex=0.8,  #65
                                     plot=curplot)

    if (100*compMeans1984[i]>=10) {
        startlab <- paste0(sprintf("%1.0f", 100*compMeans1984[i]), "%")
        laboffset <- 1.75
    } else {
        startlab <- paste0(sprintf("%1.1f", 100*compMeans1984[i]), "%")
        laboffset <- 1.9
    }
    collectedstarts[[i]] <- textTile(labels=startlab,
                                   x=min(xmarks) - laboffset + secondplot*(offset + max(xmarks)),
                                   y=meanComponents[1,i],
                                   cex=0.65,
                                   clip="off",
                                   plot=curplot)

    if (100*compMeans2009[i]>=10) {
        endlab <- paste0(sprintf("%1.0f", 100*compMeans2009[i]), "%")
        laboffset <- 1.75
    } else {
        endlab <- paste0(sprintf("%1.1f", 100*compMeans2009[i]), "%")
        laboffset <- 1.9
    }
    collectedends[[i]] <- textTile(labels=endlab,
                                   x=max(xmarks)+ laboffset + secondplot*(offset + max(xmarks)),
                                   y=meanComponents[nrow(meanComponents),i],
                                   cex=0.65,
                                   plot=curplot)

}

legendBox <- polygonTile(x=c(15,15,30,30) + offset + max(xmarks) + 1,
                         y=c(0.015, 0.074, 0.074, 0.015),
                         col=NA,
                         fill="white",
                         alpha=1,
                         layer=5,
                         plot=1)

legendText <- textTile(labels=c("median", "middle 50%", "middle 90%"),
                       x=rep(25, 3) + offset + max(xmarks) + 1,
                       y=c(exp(log(0.01) + 6/3 -0.05),
                           exp(log(0.01) + 4/3 -0.05),
                           exp(log(0.01) + 2/3 -0.05)),
                       cex=0.65,
                       layer=4,
                       plot=1)

legendLine <- linesTile(x=c(17.5, 19.75) + offset + max(xmarks) + 1,
                        y=rep(exp(log(0.01) + 6/3 -0.05), 2),
                        size=2,
                        lex=1,
                        lineend="square",
                        alpha=1,
                        layer=2,
                        plot=1)

legendShade1 <- polygonTile(x=c(17.5, 17.5, 19.75, 19.75) + offset + max(xmarks) + 1,
                            y=c(exp(log(0.01) + 4/3 + 0.125 -0.05), exp(log(0.01) + 4/3 - 0.125 -0.05),
                                exp(log(0.01) + 4/3 - 0.125 -0.05), exp(log(0.01) + 4/3 + 0.125 -0.05)),
                            col=NA,
                            fill="gray45",
                            layer=3,
                            plot=1)

legendShade2 <- polygonTile(x=c(17.5, 17.5, 19.75, 19.75) + offset + max(xmarks) + 1,
                            y=c(exp(log(0.01) + 2/3 + 0.125 -0.05), exp(log(0.01) + 2/3 - 0.125 -0.05),
                                exp(log(0.01) + 2/3 - 0.125 -0.05), exp(log(0.01) + 2/3 + 0.125 -0.05)),
                            col=NA,
                            fill="gray75",
                            layer=3,
                            plot=1)

# Send to tile
xat <- seq(1984,2009,4)
xlabs <- paste("'",substr(xat,3,4),sep="")
xat <- xat - min(xat) + 1
xat <- c(xat, xat+offset + max(xat) + 1)
xlabs <- c(xlabs, xlabs)

yat <- c(0.01, 0.02, 0.05, 0.1, 0.2, 0.5)
ylabs <- paste0(yat*100, "%")

file <- "Figure1"

tile(collectedtraces,
     collectedtitles,
     collectedends,
     collectedstarts,
     collectedCIs1,
     collectedCIs2,
     legendBox, legendText, legendLine,
     legendShade1, legendShade2,
     RxC=c(4,1),
     limits=c(-2.5, 2*max(xmarks)+1.6*offset, 0.01, 0.55),
     xaxis=list(cex=.75, at=xat, labels=xlabs),
     yaxis=list(cex=.75, major=FALSE, log=TRUE, at=yat, labels=ylabs),
     gridlines=list(type="y"),
     height=list(plot=0.225, topborder=1, bottomborder=0, spacer=0.0),
     width=list(spacer=1,null=2, yaxis.labelspace=-0.5,
       leftborder=0, rightborder=0),
     output=list(file=file, width=5.5, type="pdf")
     ) 


